# utils.py
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain_openai.llms import OpenAI
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.schema import Document
import pinecone
from langchain_community.vectorstores.chroma import Chroma
from pypdf import PdfReader
from langchain.llms.openai import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain import HuggingFaceHub


# Extract Information from PDF file
def get_pdf_text(pdf_doc):
    text = ""
    pdf_reader = PdfReader(pdf_doc)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text


# iterate over files in
# that user uploaded PDF files, one by one
def create_docs(user_pdf_list, unique_id):
    docs = []
    for filename in user_pdf_list:
        chunks = get_pdf_text(filename)

        # Adding items to our list - Adding data & its metadata
        docs.append(Document(
            page_content=chunks,
            # 添加一些额外信息
            metadata={"name": filename.name,
                      # 'UploadedFile' object has no attribute 'id'
                      "id": filename.file_id,
                      "type=": filename.type,
                      "size": filename.size,
                      "unique_id": unique_id},
        ))

    return docs


# Create embeddings instance
def create_embeddings_load_data():
    embeddings = OpenAIEmbeddings()
    return embeddings


# db - Chroma
persist_directory = 'db'


# Function to push data to Pinecone
# def push_to_pinecone(pinecone_apikey, pinecone_environment, pinecone_index_name, embeddings, docs):
def push_to_pinecone(docs, embeddings):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )

    # index_name = pinecone_index_name
    # index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
    # return index
    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
    db.persist()  # 持久化
    return db


# Function to pull index data from Pinecone
# def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
def pull_from_pinecone(embeddings):
    # pinecone.init(
    #     api_key=pinecone_apikey,
    #     environment=pinecone_environment
    # )
    #
    # index_name = pinecone_index_name
    #
    # index = Pinecone.from_existing_index(index_name, embeddings)
    # return index
    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    return db


# Function to help us get relavant documents from vector store - based on user input
# def similar_docs(query,k,pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,unique_id):
def similar_docs(query, k, embeddings, unique_id):
    # pinecone.init(
    # api_key=pinecone_apikey,
    # environment=pinecone_environment
    # )

    # index_name = pinecone_index_name

    # index = pull_from_pinecone(pinecone_apikey,pinecone_environment,index_name,embeddings)
    index = pull_from_pinecone(embeddings)
    similar_docs = index.similarity_search_with_score(query, int(k), {"unique_id": unique_id})
    # print(similar_docs)
    return similar_docs


# Helps us get the summary of a document
def get_summary(current_doc):
    llm = OpenAI(temperature=0)
    # llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature":1e-10})
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    summary = chain.run([current_doc])

    return summary
